{
  "cells": [
    {
      "cell_type": "markdown",
      "source": [
        "# Stochastic Gradient Descent Regression Part 2"
      ],
      "metadata": {}
    },
    {
      "cell_type": "code",
      "source": [
        "import numpy as np\n",
        "import matplotlib.pyplot as plt\n",
        "import pandas as pd\n",
        "\n",
        "import warnings\n",
        "warnings.filterwarnings(\"ignore\")\n",
        "\n",
        "# fix_yahoo_finance is used to fetch data \n",
        "import fix_yahoo_finance as yf\n",
        "yf.pdr_override()"
      ],
      "outputs": [],
      "execution_count": 1,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# input\n",
        "symbol = 'AMD'\n",
        "start = '2014-01-01'\n",
        "end = '2018-08-27'\n",
        "\n",
        "# Read data \n",
        "dataset = yf.download(symbol,start,end)\n",
        "\n",
        "# View Columns\n",
        "dataset.head()"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[*********************100%***********************]  1 of 1 downloaded\n"
          ]
        },
        {
          "output_type": "execute_result",
          "execution_count": 2,
          "data": {
            "text/plain": "            Open  High   Low  Close  Adj Close    Volume\nDate                                                    \n2014-01-02  3.85  3.98  3.84   3.95       3.95  20548400\n2014-01-03  3.98  4.00  3.88   4.00       4.00  22887200\n2014-01-06  4.01  4.18  3.99   4.13       4.13  42398300\n2014-01-07  4.19  4.25  4.11   4.18       4.18  42932100\n2014-01-08  4.23  4.26  4.14   4.18       4.18  30678700",
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Open</th>\n      <th>High</th>\n      <th>Low</th>\n      <th>Close</th>\n      <th>Adj Close</th>\n      <th>Volume</th>\n    </tr>\n    <tr>\n      <th>Date</th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>2014-01-02</th>\n      <td>3.85</td>\n      <td>3.98</td>\n      <td>3.84</td>\n      <td>3.95</td>\n      <td>3.95</td>\n      <td>20548400</td>\n    </tr>\n    <tr>\n      <th>2014-01-03</th>\n      <td>3.98</td>\n      <td>4.00</td>\n      <td>3.88</td>\n      <td>4.00</td>\n      <td>4.00</td>\n      <td>22887200</td>\n    </tr>\n    <tr>\n      <th>2014-01-06</th>\n      <td>4.01</td>\n      <td>4.18</td>\n      <td>3.99</td>\n      <td>4.13</td>\n      <td>4.13</td>\n      <td>42398300</td>\n    </tr>\n    <tr>\n      <th>2014-01-07</th>\n      <td>4.19</td>\n      <td>4.25</td>\n      <td>4.11</td>\n      <td>4.18</td>\n      <td>4.18</td>\n      <td>42932100</td>\n    </tr>\n    <tr>\n      <th>2014-01-08</th>\n      <td>4.23</td>\n      <td>4.26</td>\n      <td>4.14</td>\n      <td>4.18</td>\n      <td>4.18</td>\n      <td>30678700</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
          },
          "metadata": {}
        }
      ],
      "execution_count": 2,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "dataset = dataset.reset_index()"
      ],
      "outputs": [],
      "execution_count": 3,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "dataset['Increase_Decrease'] = np.where(dataset['Volume'].shift(-1) > dataset['Volume'],'Increase','Decrease')\n",
        "dataset['Buy_Sell_on_Open'] = np.where(dataset['Open'].shift(-1) > dataset['Open'],1,0)\n",
        "dataset['Buy_Sell'] = np.where(dataset['Adj Close'].shift(-1) > dataset['Adj Close'],1,0)\n",
        "dataset['Return'] = dataset['Adj Close'].pct_change()\n",
        "dataset = dataset.dropna()\n",
        "dataset.head()"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 4,
          "data": {
            "text/plain": "        Date  Open  High   Low  Close  Adj Close    Volume Increase_Decrease  \\\n1 2014-01-03  3.98  4.00  3.88   4.00       4.00  22887200          Increase   \n2 2014-01-06  4.01  4.18  3.99   4.13       4.13  42398300          Increase   \n3 2014-01-07  4.19  4.25  4.11   4.18       4.18  42932100          Decrease   \n4 2014-01-08  4.23  4.26  4.14   4.18       4.18  30678700          Decrease   \n5 2014-01-09  4.20  4.23  4.05   4.09       4.09  30667600          Decrease   \n\n   Buy_Sell_on_Open  Buy_Sell    Return  \n1                 1         1  0.012658  \n2                 1         1  0.032500  \n3                 1         0  0.012107  \n4                 0         0  0.000000  \n5                 0         1 -0.021531  ",
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Date</th>\n      <th>Open</th>\n      <th>High</th>\n      <th>Low</th>\n      <th>Close</th>\n      <th>Adj Close</th>\n      <th>Volume</th>\n      <th>Increase_Decrease</th>\n      <th>Buy_Sell_on_Open</th>\n      <th>Buy_Sell</th>\n      <th>Return</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>1</th>\n      <td>2014-01-03</td>\n      <td>3.98</td>\n      <td>4.00</td>\n      <td>3.88</td>\n      <td>4.00</td>\n      <td>4.00</td>\n      <td>22887200</td>\n      <td>Increase</td>\n      <td>1</td>\n      <td>1</td>\n      <td>0.012658</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>2014-01-06</td>\n      <td>4.01</td>\n      <td>4.18</td>\n      <td>3.99</td>\n      <td>4.13</td>\n      <td>4.13</td>\n      <td>42398300</td>\n      <td>Increase</td>\n      <td>1</td>\n      <td>1</td>\n      <td>0.032500</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>2014-01-07</td>\n      <td>4.19</td>\n      <td>4.25</td>\n      <td>4.11</td>\n      <td>4.18</td>\n      <td>4.18</td>\n      <td>42932100</td>\n      <td>Decrease</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0.012107</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>2014-01-08</td>\n      <td>4.23</td>\n      <td>4.26</td>\n      <td>4.14</td>\n      <td>4.18</td>\n      <td>4.18</td>\n      <td>30678700</td>\n      <td>Decrease</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0.000000</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>2014-01-09</td>\n      <td>4.20</td>\n      <td>4.23</td>\n      <td>4.05</td>\n      <td>4.09</td>\n      <td>4.09</td>\n      <td>30667600</td>\n      <td>Decrease</td>\n      <td>0</td>\n      <td>1</td>\n      <td>-0.021531</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
          },
          "metadata": {}
        }
      ],
      "execution_count": 4,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "dataset.tail()"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 20,
          "data": {
            "text/plain": "           Date       Open       High        Low      Close  Adj Close  \\\n1167 2018-08-21  19.980000  20.420000  19.860001  20.400000  20.400000   \n1168 2018-08-22  20.280001  20.920000  20.209999  20.900000  20.900000   \n1169 2018-08-23  21.190001  22.320000  21.139999  22.290001  22.290001   \n1170 2018-08-24  22.910000  24.000000  22.670000  23.980000  23.980000   \n1171 2018-08-27  24.940001  27.299999  24.629999  25.260000  25.260000   \n\n         Volume Increase_Decrease  Buy_Sell_on_Open  Buy_Sell    Return  \n1167   55629000          Increase                 1         1  0.021021  \n1168   62002700          Increase                 1         1  0.024510  \n1169  113444100          Increase                 1         1  0.066507  \n1170  164328200          Increase                 1         1  0.075819  \n1171  325058400          Decrease                 0         0  0.053378  ",
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Date</th>\n      <th>Open</th>\n      <th>High</th>\n      <th>Low</th>\n      <th>Close</th>\n      <th>Adj Close</th>\n      <th>Volume</th>\n      <th>Increase_Decrease</th>\n      <th>Buy_Sell_on_Open</th>\n      <th>Buy_Sell</th>\n      <th>Return</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>1167</th>\n      <td>2018-08-21</td>\n      <td>19.980000</td>\n      <td>20.420000</td>\n      <td>19.860001</td>\n      <td>20.400000</td>\n      <td>20.400000</td>\n      <td>55629000</td>\n      <td>Increase</td>\n      <td>1</td>\n      <td>1</td>\n      <td>0.021021</td>\n    </tr>\n    <tr>\n      <th>1168</th>\n      <td>2018-08-22</td>\n      <td>20.280001</td>\n      <td>20.920000</td>\n      <td>20.209999</td>\n      <td>20.900000</td>\n      <td>20.900000</td>\n      <td>62002700</td>\n      <td>Increase</td>\n      <td>1</td>\n      <td>1</td>\n      <td>0.024510</td>\n    </tr>\n    <tr>\n      <th>1169</th>\n      <td>2018-08-23</td>\n      <td>21.190001</td>\n      <td>22.320000</td>\n      <td>21.139999</td>\n      <td>22.290001</td>\n      <td>22.290001</td>\n      <td>113444100</td>\n      <td>Increase</td>\n      <td>1</td>\n      <td>1</td>\n      <td>0.066507</td>\n    </tr>\n    <tr>\n      <th>1170</th>\n      <td>2018-08-24</td>\n      <td>22.910000</td>\n      <td>24.000000</td>\n      <td>22.670000</td>\n      <td>23.980000</td>\n      <td>23.980000</td>\n      <td>164328200</td>\n      <td>Increase</td>\n      <td>1</td>\n      <td>1</td>\n      <td>0.075819</td>\n    </tr>\n    <tr>\n      <th>1171</th>\n      <td>2018-08-27</td>\n      <td>24.940001</td>\n      <td>27.299999</td>\n      <td>24.629999</td>\n      <td>25.260000</td>\n      <td>25.260000</td>\n      <td>325058400</td>\n      <td>Decrease</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0.053378</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
          },
          "metadata": {}
        }
      ],
      "execution_count": 20,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "dataset.shape"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 10,
          "data": {
            "text/plain": "(1171, 11)"
          },
          "metadata": {}
        }
      ],
      "execution_count": 10,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from sklearn.linear_model import SGDRegressor"
      ],
      "outputs": [],
      "execution_count": 31,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "X = np.array(dataset['Open']).reshape(1171,-1)\n",
        "Y = np.array(dataset['Adj Close']).reshape(1171,-1)"
      ],
      "outputs": [],
      "execution_count": 32,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "sgd = SGDRegressor()\n",
        "sgd.fit(X, Y)"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 34,
          "data": {
            "text/plain": "SGDRegressor(alpha=0.0001, average=False, epsilon=0.1, eta0=0.01,\n       fit_intercept=True, l1_ratio=0.15, learning_rate='invscaling',\n       loss='squared_loss', max_iter=None, n_iter=None, penalty='l2',\n       power_t=0.25, random_state=None, shuffle=True, tol=None, verbose=0,\n       warm_start=False)"
          },
          "metadata": {}
        }
      ],
      "execution_count": 34,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "sgd.coef_"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 35,
          "data": {
            "text/plain": "array([0.99823223])"
          },
          "metadata": {}
        }
      ],
      "execution_count": 35,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "sgd.intercept_ "
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 36,
          "data": {
            "text/plain": "array([0.00456104])"
          },
          "metadata": {}
        }
      ],
      "execution_count": 36,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "sgd.predict(Y)"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 37,
          "data": {
            "text/plain": "array([ 3.99748994,  4.12726013,  4.17717174, ..., 22.25515835,\n       23.94216982, 25.21990707])"
          },
          "metadata": {}
        }
      ],
      "execution_count": 37,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from sklearn.metrics import r2_score # Continous and Binary\n",
        "r2_score(X,Y)"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 42,
          "data": {
            "text/plain": "0.9976756982529226"
          },
          "metadata": {}
        }
      ],
      "execution_count": 42,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    }
  ],
  "metadata": {
    "kernel_info": {
      "name": "python3"
    },
    "language_info": {
      "version": "3.5.5",
      "pygments_lexer": "ipython3",
      "file_extension": ".py",
      "codemirror_mode": {
        "version": 3,
        "name": "ipython"
      },
      "mimetype": "text/x-python",
      "nbconvert_exporter": "python",
      "name": "python"
    },
    "kernelspec": {
      "name": "python3",
      "language": "python",
      "display_name": "Python 3"
    },
    "nteract": {
      "version": "0.28.0"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 4
}